################################################
# Analyzing survey data, unweighted, with neighborhood clustering 
# Created AJH
# Created 4/27/2022
# Last edited 6/10/2022
################################################
library(tidyverse)
library(srvyr)
library(survey)
library(stargazer)
library(sf)
library(gridExtra)
library(miceadds)

# No weights, clustering

# load in data
load("citizen_public_service_survey/pooled_weighted_five.Rdata")
# join with rationing data
load("chapter_5_rewarding_responsiveness/additional_data/tandeo_colonias.Rdata") 
# note that there are about 200 observations in which the person answered that their colonia was "other". For now, just assign their tandeo status as NA
dat <- left_join(dat, tandeo_colonias[,c("cve_col", "tandeo_2021")], by ="cve_col")


# Make a figure of water hours 
dat$water_supply_cat <- case_when(
  dat$water_hours_weekly <5~ "<5",
  dat$water_hours_weekly >5 & dat$water_hours_weekly <= 15 ~ "5-15",
  dat$water_hours_weekly >15 & dat$water_hours_weekly <= 30 ~ "15-30",
  dat$water_hours_weekly >30 & dat$water_hours_weekly <= 45  ~ "30-45",
  dat$water_hours_weekly >45 & dat$water_hours_weekly <= 60  ~ "45-60",
  dat$water_hours_weekly >60 & dat$water_hours_weekly <= 75  ~ "60-75",
  dat$water_hours_weekly >75 & dat$water_hours_weekly <= 90  ~ "75-90",
  dat$water_hours_weekly >90 & dat$water_hours_weekly <= 105  ~ "90-105",
  dat$water_hours_weekly >105 & dat$water_hours_weekly <= 120  ~ "105-120",
  dat$water_hours_weekly >120 & dat$water_hours_weekly <= 135  ~ "120-135",
  dat$water_hours_weekly >135 & dat$water_hours_weekly <= 150  ~ "135-150",
  dat$water_hours_weekly >150 & dat$water_hours_weekly < 168 ~ "150-167",
  dat$water_hours_weekly == 168~ "Continuous"
)
dat$water_supply_cat = factor(dat$water_supply_cat, levels = c("5-15", "15-30","30-45","45-60","60-75","75-90",
                                                               "90-105", "105-120", "120-135","135-150", "150-167","Continuous" ))

dat$tandeo_2021 <- factor(dat$tandeo_2021, levels= c(1,0), labels= c("On Rationing Lists", "Not on Rationing Lists"))



########
# Places with intermittent water supply have more outages, make more requests and get trucks more frequently 
########

# For appendix: Do this in regression form, using intermittency and water hours
# I only replicate the plots, not the individual regression tables 
# outage
dat <- dat[!is.na(dat$cve_col),]
m1_1 <- glm.cluster(formula = outage_yesno ~ intermittent, data=dat, family ="quasibinomial", cluster = "cve_col")
m1_2 <- glm.cluster(formula = outage_yesno ~ water_service, data= dat, family ="quasibinomial", cluster= "cve_col")
m1_3 <- glm.cluster(formula = outage_yesno ~ water_hours_weekly, data= dat, family ="quasibinomial",cluster = "cve_col")

# appeal
m1_4 <- glm.cluster(formula = claim_water ~ intermittent, data= dat, family ="quasibinomial", cluster = "cve_col")
m1_5 <- glm.cluster(formula = claim_water ~ water_service, data= dat, family ="quasibinomial", cluster = "cve_col")
m1_6 <- glm.cluster(formula = claim_water ~ water_hours_weekly, data= dat, family ="quasibinomial", cluster = "cve_col")

#trucks
m1_7 <- glm.cluster(formula = truck_any ~ intermittent, data= dat, family ="quasibinomial", cluster = "cve_col")
m1_8 <- glm.cluster(formula = truck_any ~ water_service, data= dat, family ="quasibinomial", cluster= "cve_col")
m1_9 <- glm.cluster(formula = truck_any ~ water_hours_weekly, data= dat, family ="quasibinomial", cluster= "cve_col")

# Make a coefficient plot  for the main paper
coefs_phase1 <- data_frame(outcome = c("Experiencing\n a water outage", "Making a\n Water-Related\n Claim","Receiving \n a Water Tanker Truck"), 
                           intermittency_coef = c(coef(m1_1)[2], coef(m1_4)[2],coef(m1_7)[2]),
                           intermittency_lower =c(coef(m1_1)[2]- (1.96*0.2352), coef(m1_4)[2]-(1.96 *0.3394),coef(m1_7)[2]- (1.96*0.2583)),
                           intermittency_upper =c(coef(m1_1)[2]+ (1.96*0.2352), coef(m1_4)[2]+(1.96 *0.3394),coef(m1_7)[2]+ (1.96*0.2583)))

pz <- ggplot(data = coefs_phase1) + 
  geom_point(aes(x= outcome, y = intermittency_coef)) + 
  geom_errorbar(aes(x = outcome, ymin = intermittency_lower, ymax = intermittency_upper))+ 
  theme_classic() + 
  geom_hline(yintercept = 0, color = "blue", linetype = "dashed")+
  labs(x = "", y = "Coefficient on Intermittency", 
       title = "IWS Makes Residents More Likely \n to Experience Outages Make Claims, \n and Receive a Tanker Truck", 
       caption = "95% Confidence Interval")+
  theme(axis.title = element_text(size =15), 
        axis.text = element_text(size = 15))
ggsave(pz, file = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/1_Plots/5_Spring2022/graphics/unweighted_clustered/appendix_appeals_trucks_cls.jpg", width = 7)




# Does responsiveness shape political evaluations?
# some summary stats on evaluations of alcalde
dat$elec_alc_eval <- factor(dat$elec_alc_eval, 
                            levels = c("Muy Malo", "Malo", "Bueno","Muy bueno", "No sé"),
                            labels = c("Very poor", "Poor", "Good", "Very good", "Don't know"))



# First, make a plot showing the simple correlation coefficients with error bars. This goes in the main paper. 
m2_1 <- glm.cluster(alc_eval_num ~ intermittent, data = dat, family ="gaussian", cluster = "cve_col") # in general, places that have intermittent water evaluate their mayor less favorably 
m2_2 <- glm.cluster(alc_eval_num ~  claim_water, data= dat, family ="gaussian", cluster = "cve_col") # having made a claim is negatively associated with the evaluation of the mayor
m2_3 <- glm.cluster(alc_eval_num ~    alc_water_claim_respond, data= dat, family ="gaussian", cluster= "cve_col") # but getting a response is positively associated with the evaluation of the mayor 
m2_4 <- glm.cluster(alc_eval_num ~   alc_water_solve_partial, data= dat, family ="gaussian", cluster = "cve_col") # especially if the mayor actually solved your problem
m2_5 <- glm.cluster(alc_eval_num ~   alc_water_solve_complete, data= dat, family ="gaussian", cluster = "cve_col") # especially if the mayor actually solved your problem


coefs <- c(coef(m2_1)[2],coef(m2_2)[2],coef(m2_3)[2],coef(m2_4)[2],coef(m2_5)[2])
lower <- c(coef(m2_1)[2]-1.96*summary(m2_1)[4],coef(m2_2)[2]-1.96*summary(m2_2)[4],coef(m2_3)[2]-1.96*summary(m2_3)[4],coef(m2_4)[2]-1.96*summary(m2_1)[4],coef(m2_5)[2]-1.96*summary(m2_5)[4])
upper <- c(coef(m2_1)[2]+1.96*summary(m2_1)[4],coef(m2_2)[2]+1.96*summary(m2_2)[4],coef(m2_3)[2]+1.96*summary(m2_3)[4],coef(m2_4)[2]+1.96*summary(m2_4)[4],coef(m2_5)[2]+1.96*summary(m2_5)[4])
names <- c("Respondent has \n intermittent water \n (N=2302)", "Respondent made \n water-related appeal \n (N=2302)", "Mayor responded  \n to appeal \n (N=485)", "Mayor solved problem  \n at least partially \n (N = 486)", "Mayor completely  \n solved problem \n (N=486)")
coefs_all <- tibble(coefs, lower, upper, names)
coefs_all$names <- factor(coefs_all$names,
                          levels =  c("Respondent has \n intermittent water \n (N=2302)", "Respondent made \n water-related appeal \n (N=2302)", "Mayor responded  \n to appeal \n (N=485)", "Mayor solved problem  \n at least partially \n (N = 486)", "Mayor completely  \n solved problem \n (N=486)"))

coefs_all$names <- factor(coefs_all$names,
                          levels =c("Mayor completely  \n solved problem \n (N=486)","Mayor solved problem  \n at least partially \n (N = 486)","Mayor responded  \n to appeal \n (N=485)","Respondent made \n water-related appeal \n (N=2302)","Respondent has \n intermittent water \n (N=2302)"))


p_coefs <- ggplot(data= coefs_all)+ 
  geom_point(aes(x= names, y = coefs))+
  geom_errorbar(aes(x= names, ymin=lower, ymax=upper))+ 
  coord_flip() +
  theme_classic()+
  theme(axis.text = element_text(size=15))+
  theme(axis.title= element_text(size=15))+
  theme(title= element_text(size=18))+
  geom_hline(aes(yintercept= 0), color= "grey50", linetype = "dotted")+
  labs(title = "Evaluation of\n Incumbent Mayor", y = "Correlation Coefficient", x="", caption = "95% confidence intervals")

ggsave(p_coefs, file = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/1_Plots/5_Spring2022/graphics/unweighted_clustered/appendix_coefs_response_cls.jpg", width =7)

m2_1 <- glm.cluster(alc_eval_num ~ intermittent, data =dat, family ="gaussian",cluster = "cve_col" ) # in general, places that have intermittent water evaluate their mayor less favorably 
m2_2 <- glm.cluster(alc_eval_num ~ intermittent + claim_water,  data =dat, family ="gaussian",cluster = "cve_col") # having made a claim is negatively associated with the evaluation of the mayor
m2_3 <- glm.cluster(alc_eval_num ~  intermittent  +  alc_water_claim_respond,  data =dat, family ="gaussian",cluster = "cve_col") # but getting a response is positively associated with the evaluation of the mayor 
m2_4 <- glm.cluster(alc_eval_num ~  intermittent  +  alc_water_solve_partial,  data =dat, family ="gaussian",cluster = "cve_col") # especially if the mayor actually solved your problem
m2_5 <- glm.cluster(alc_eval_num ~  intermittent  +  alc_water_solve_complete,  data =dat, family ="gaussian",cluster = "cve_col") # especially if the mayor actually solved your problem
#m2_6 <- svyglm(alc_eval_num ~   alc_water_claim_respond + alc_respond_binary, design= dat_we,family ="gaussian") # 

m2_1_1 <- glm.cluster(alc_eval_num ~ intermittent+copartisan, data = dat, family ="gaussian", cluster = "cve_col") # in general, places that have intermittent water evaluate their mayor less favorably 
m2_2_1 <- glm.cluster(alc_eval_num ~  claim_water +copartisan, data = dat, family ="gaussian", cluster = "cve_col") # having made a claim is negatively associated with the evaluation of the mayor
m2_3_1 <- glm.cluster(alc_eval_num ~   alc_water_claim_respond+copartisan, data = dat, family ="gaussian", cluster = "cve_col") # but getting a response is positively associated with the evaluation of the mayor 
m2_4_1 <- glm.cluster(alc_eval_num ~   alc_water_solve_partial +copartisan,data = dat, family ="gaussian", cluster = "cve_col") # especially if the mayor actually solved your problem
m2_5_1 <- glm.cluster(alc_eval_num ~  alc_water_solve_complete+copartisan, data = dat, family ="gaussian", cluster = "cve_col") # especially if the mayor actually solved your problem
#m2_6 <- svyglm(alc_eval_num ~   alc_water_claim_respond + alc_respond_binary, design= dat_we,family ="gaussian") # 


coefs_party <- c(coef(m2_1_1)[2],coef(m2_2_1)[2],coef(m2_3_1)[2],coef(m2_4_1)[2],coef(m2_5_1)[2])
lower_party <- c(coef(m2_1_1)[2]-1.96*summary(m2_1_1)[5],coef(m2_2_1)[2]-1.96*summary(m2_2_1)[5],coef(m2_3_1)[2]-1.96*summary(m2_3_1)[5],coef(m2_4_1)[2]-1.96*summary(m2_4_1)[5],coef(m2_5_1)[2]-1.96*summary(m2_5_1)[5])
upper_party <- c(coef(m2_1_1)[2]+1.96*summary(m2_1_1)[5],coef(m2_2_1)[2]+1.96*summary(m2_2_1)[5],coef(m2_3_1)[2]+1.96*summary(m2_3_1)[5],coef(m2_4_1)[2]+1.96*summary(m2_4_1)[5],coef(m2_5_1)[2]+1.96*summary(m2_5_1)[5])
names_party <- c("Respondent has \n intermittent water \n (N=2302)", "Respondent made \n water-related appeal \n (N=2302)", "Mayor responded  \n to appeal \n (N=485)", "Mayor solved problem  \n at least partially \n (N = 486)", "Mayor completely  \n solved problem \n (N=486)")
coefs_all_party <- tibble(coefs_party, lower_party, upper_party, names_party)
coefs_all_party$names_party <- factor(coefs_all_party$names_party,
                                      levels =  c("Respondent has \n intermittent water \n (N=2302)", "Respondent made \n water-related appeal \n (N=2302)", "Mayor responded  \n to appeal \n (N=485)", "Mayor solved problem  \n at least partially \n (N = 486)", "Mayor completely  \n solved problem \n (N=486)"))

coefs_all_party$names_party <- factor(coefs_all_party$names_party,
                                      levels =c("Mayor completely  \n solved problem \n (N=486)","Mayor solved problem  \n at least partially \n (N = 486)","Mayor responded  \n to appeal \n (N=485)","Respondent made \n water-related appeal \n (N=2302)","Respondent has \n intermittent water \n (N=2302)"))


p_coefs_party <- ggplot(data= coefs_all_party)+ 
  geom_point(aes(x= names_party, y = coefs_party))+
  geom_errorbar(aes(x= names_party, ymin=lower_party, ymax=upper_party))+ 
  coord_flip() +
  theme_classic()+
  theme(axis.text = element_text(size=15))+
  theme(axis.title= element_text(size=15))+
  theme(title= element_text(size=18))+
  geom_hline(aes(yintercept= 0), color= "grey50", linetype = "dotted")+
  labs(title = "Evaluation of\n Incumbent Mayor \n (With Controls for Copartisanship)", y = "Correlation Coefficient", x="", caption = "95% confidence interval")

ggsave(p_coefs_party, file = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/1_Plots/5_Spring2022/graphics/unweighted_clustered/appendix_coefs_response_party_cls.jpg", width = 8)




m2_1_2 <- glm.cluster(alc_eval_num ~ intermittent+factor(cve_alc), data = dat, family ="gaussian", cluster = "cve_col") # in general, places that have intermittent water evaluate their mayor less favorably 
m2_2_2 <- glm.cluster(alc_eval_num ~ intermittent + claim_water +factor(cve_alc),  data = dat, family ="gaussian", cluster = "cve_col") # having made a claim is negatively associated with the evaluation of the mayor
m2_3_2 <- glm.cluster(alc_eval_num ~  intermittent  +  alc_water_claim_respond+factor(cve_alc),  data = dat, family ="gaussian", cluster = "cve_col") # but getting a response is positively associated with the evaluation of the mayor 
m2_4_2 <- glm.cluster(alc_eval_num ~  intermittent  +  alc_water_solve_partial +factor(cve_alc),  data = dat, family ="gaussian", cluster = "cve_col") # especially if the mayor actually solved your problem
m2_5_2 <- glm.cluster(alc_eval_num ~  intermittent  +  alc_water_solve_complete+factor(cve_alc),  data = dat, family ="gaussian", cluster = "cve_col") # especially if the mayor actually solved your problem



m2_1_3 <- glm.cluster(alc_eval_num ~ intermittent+factor(cve_alc)+party, data =dat, family ="gaussian", cluster = "cve_col") # in general, places that have intermittent water evaluate their mayor less favorably 
m2_2_3 <- glm.cluster(alc_eval_num ~ intermittent + claim_water +factor(cve_alc)+party,data =dat,  family ="gaussian", cluster = "cve_col") # having made a claim is negatively associated with the evaluation of the mayor
m2_3_3 <- glm.cluster(alc_eval_num ~  intermittent  +  alc_water_claim_respond+factor(cve_alc) +party, data =dat,  family ="gaussian", cluster = "cve_col") # but getting a response is positively associated with the evaluation of the mayor 
m2_4_3 <- glm.cluster(alc_eval_num ~  intermittent  +  alc_water_solve_partial +factor(cve_alc)+party, data =dat,  family ="gaussian", cluster = "cve_col") # especially if the mayor actually solved your problem
m2_5_3 <- glm.cluster(alc_eval_num ~  intermittent  +  alc_water_solve_complete+factor(cve_alc)+party, data =dat,  family ="gaussian", cluster = "cve_col") # especially if the mayor actually solved your problem
